

cls


adopath + "D:\data\ado"

global product id_conc // name of the concorded product categories
global firstyear 2011
global lastyear 2017
global product id_conc 
global date march2021
global countrysample "BE BO BR CA CH CL CN CR DE DO EC ES GB GT IN IT JP KR MZ NL PA PE PR RU SV US VE" 
global time_constraint " " //"drop if firstyear_panel!=firstyear_sample" in a robustness, we add a constraint to deal with left censoring 
global time_constraint2 "drop if firstyear_sample>2016" // drop right-censored seller-buyer relationships

	
cd $adop
do id_group 
do construct_base_4decile 
do estimate_bydecile

cd $datapath/colombia 
use transaction_all_id_conc$firstyear$lastyear$date, clear 
save $firmpath/transaction_colombia, replace 

construct_base_4decile transaction_colombia base_colombia "$time_constraint" "$time_constraint2" whole "$countrysample" //nomulti

cd $firmpath
use base_colombia, clear
destring $product, replace
tostring id_conc, replace 
replace id_conc="0"+id_conc if length(id_conc)==5
g hs4=substr(id_conc,1,4) 
sort iso2 id_bp id_bs id_bs_continuous
id_group _ "iso2 $product id_bp" // id_group does the same job as egen group (but work if we have millions of groups)
drop id_bp
rename _ id_bp
id_group _ "iso2 $product id_bs"
drop id_bs
rename _ id_bs
id_group _ "iso2 $product id_bs_continuous"
drop id_bs_continuous
rename _ id_bs_continuous
g dur=length_bps 
g size=export // used to be totx/dur, but this creates some issues as firms do not need inputs every month
collapse (mean) size dur  , by(id_bs_continuous $product iso2 hs4) 
g lsize=log(size)
egen _=mean(lsize), by($product)
g lsize_= lsize- _ 
egen min=pctile(lsize_), by($product)  p(1) 
egen max=pctile(lsize_), by($product)  p(99) 
egen nb=count(size), by($product)
keep if nb>100
egen decile=xtile(lsize_) , by($product) nq(10) // by product or product iso2? => makes more sense by product only 
drop if lsize_<min | lsize_>max 
save base4reg_col, replace 

use base4reg_col, clear 
local 2 dur 
collapse (mean) size `2'  (count) nb=size, by(decile hs4 $product) 
g l`2'=log(`2')
g fdecile=log(log(11 - decile) - log(10 - decile))
replace fdecile=log(log(1.1)) if decile==1 // this is for size from pctile 0.01 to pctile 0.1 
replace fdecile=log(log(10)) if decile==10 // this is for size from pctile 0.9 to pctile 0.99 
replace l`2'=log(`2'/0.9) if decile==1 |decile==10 // to account for the fact that we drop the top1% and bottom 1%
egen p=group(hs4)
reghdfe l`2' fdecile , a(rs=p) // baseline regression (eq. 3 of the paper) 
collapse (mean) rs `2' (sum) nb, by(hs4)
label var rs "Relationship stickiness"
rename rs rs_colombia 
save $rspath/stickiness_colombia, replace
